---
title: "Appendix: How do strikes and lockouts affect applications to Danish public service professional education programs? A synthetic control analysis"
format: 
  pdf:
    toc: true
    toc-depth: 6
    self-contained: true
    code-fold: false
editor: source
author:
  - name: Christian Heide
    affiliations:
      - ref: uni1
  - name: Florian Keppeler
    affiliations:
      - ref: uni1
affiliations:
  - id: uni1
    name: Department of Political Science, Aarhus University, Denmark
theme: flatly
execute: 
  cache: false
  echo: false
  warning: false
  message: false
  fig-width: 10
  fig-height: 6
editor_options: 
  chunk_output_type: console
fig-cap-location: top
header-includes: |
  \usepackage{caption}
  \usepackage{threeparttable}
  \captionsetup[table]{position=top}
  \captionsetup[figure]{position=top}
  \renewcommand{\thetable}{S1.\arabic{table}} 
  \renewcommand{\thefigure}{S1.\arabic{figure}}
  \usepackage{float}
  \usepackage{siunitx}
---


```{r}
# Libraries
library(tidyverse)
library(gsynth)
library(panelView)
library(xtable)
library(patchwork)
library(stargazer)
library(kableExtra)

# Data
df <- readRDS(paste0(here::here(), "/input/processed/applications_1997-2024.RDS"))
```



```{r}
 # To impute missing values
lm_impute <- function(df, missing_var, rhs, education) {
  
  edu_df <- df |> 
    filter(uddannelse == education)
  
  # Indices of rows with missing values
  missing_rows <- which(is.na(df[[missing_var]]) & df$uddannelse == education)
  
  if (nrow(edu_df) > length(missing_rows)) {
    
    # Linear model
    temp_model <- lm(
      formula = as.formula(paste(missing_var, "~", rhs)),
      data = edu_df,
      na.action = na.exclude
      )
    
    # Using model to impute missing values
    df[[missing_var]][missing_rows] <- predict(temp_model, df[missing_rows,])
    
  }
  
  return(df)
}
```



```{r}
report_results <- function(fit, education_name, t0, pre_se = "bootstrapped") {
  
  # Data frames with complete observations for pretty plotting
  if (education_name == "Folkeskolelærer") {
    complete_df <- teacher_df |> 
      filter(år <= 2019 & !is.na(ansøgninger_main)) |> 
      group_by(uddannelse) |> 
      summarise(n = n()) |> 
      filter(n == max(n))
    
  } else if (education_name == "Sygeplejerske (kvote 1)") {
    complete_df <- nurse_df_k1 |> 
      filter(!is.na(ansøgninger_main)) |> 
      group_by(uddannelse) |> 
      summarise(n = n()) |> 
      filter(n == max(n))
    
  } else if (education_name == "Sygeplejerske (kvote 2)") {
    complete_df <- nurse_df_k2 |>
      filter(!is.na(ansøgninger_main)) |> 
      group_by(uddannelse) |> 
      summarise(n = n()) |> 
      filter(n == max(n))
      
  } else {
    complete_df <- NULL
  }
  
  # First making counterfactual data frame
  ct_df <- data.frame(
    education = education_name,
    t = as.vector(fit[["time"]]),
    observed = as.vector(fit[["Y.tr"]]),
    sc = as.vector(fit[["Y.ct"]]),
    t0 = t0)
  
  # Making controls data frame
  controls_df <- cbind(ct_df, fit[["Y.dat"]]) |> 
    pivot_longer(cols = -c(education, t, observed, sc, t0),
               names_to = "control_education",
               values_to = "applications") |> 
    filter(control_education != education_name)
  
  # Only including complete data series in plot
  if (!is.null(complete_df)) {
    controls_df <- controls_df |> 
      filter(control_education %in% complete_df$uddannelse)
  }
  
  # Making gap data frame
  gap_df <- ct_df |> 
    mutate(estimate = observed - sc) |> 
    bind_cols(as.data.frame(fit[["est.att"]]))
  
  if (pre_se == "mspe") {
    gap_df <- gap_df |> 
      mutate(
        CI.lower = if_else(
          n.Treated == 0,
          estimate - 1.96 * sqrt(fit[["MSPE"]] / sum(as.vector(fit[["est.att"]][,"n.Treated"]) == 0)),
          CI.lower),
        CI.upper = if_else(
          n.Treated == 0,
          estimate + 1.96 * sqrt(fit[["MSPE"]] / sum(as.vector(fit[["est.att"]][,"n.Treated"]) == 0)),
          CI.upper))
  }
  
  # Making plots
  ct_plot <- ct_df |> 
    ggplot(aes(y = observed, x = t)) +
    geom_line(
      aes(linetype = "Observed"),
      color = "black",
      linewidth = 1) +
    geom_line(
      aes(y = sc, linetype = "Synthetic"),
      linewidth = 1) +
    geom_vline(aes(xintercept = t0), linetype = "dotted") +
    geom_line(
      data = controls_df,
      aes(x = t, y = applications, group = control_education, linetype = "Donor education programs"),
      inherit.aes = FALSE,
      color = "grey",
      alpha = 0.5) +
    scale_linetype_manual(
      values = c(
        "Observed" = "solid",
        "Synthetic" = "dashed",
        "Donor education programs" = "solid")) +
    scale_x_continuous(expand = c(0, 0)) +
    theme_bw() +
    theme(legend.position = "bottom") +
    labs(
      x = NULL,
      y = "Applications",
      linetype = NULL) +
    ggtitle("A. Observed and counterfactual development")
  
  gap_plot <- ggplot(gap_df, aes(x = t, y = estimate)) +
   geom_line(color = "black", linewidth = 1) +
   geom_ribbon(aes(ymin = CI.lower, ymax = CI.upper), alpha = 0.3) + 
   geom_hline(yintercept = 0) +
   geom_vline(aes(xintercept = t0), linetype = "dotted") +
   scale_x_continuous(expand = c(0, 0)) +
   theme_bw() +
   labs(x = NULL,
        y = "Estimate") +
   ggtitle("B. Effect estimates")
  
  return(list(ct_plot, gap_plot))
  
  }


```

\newpage

# Data

```{r}
nurse_df_total <- df |> 
  filter(treatment != "Lærerlockout")

# Alternative imputing: Just using observed median
nurse_df_total <- nurse_df_total |> 
  group_by(uddannelse) |> 
  mutate(
    andel_kvote2_alternativ = if_else(
      is.na(andel_kvote2_prio1),
      median(andel_kvote2_prio1, na.rm = TRUE),
      andel_kvote2_prio1)) |> 
  ungroup()

# Imputing missing values
for (u in unique(nurse_df_total$uddannelse)) {
  nurse_df_total <- nurse_df_total |> 
    lm_impute(missing_var = "andel_kvote2_prio1", rhs = "år", education = u) |> 
    lm_impute(missing_var = "andel_kvote2_total", rhs = "år", education = u)
}

# Calculating number of kvote 2 applications
nurse_df_total <- nurse_df_total |> 
  mutate(
    ansøgninger_prio1_kvote2 = if_else(
      år < 2009,
      ansøgninger_prio1 * andel_kvote2_prio1,
      ansøgninger_prio1_kvote2),
    ansøgninger_prio1_kvote2_alternativ = if_else(
      år < 2009,
      ansøgninger_prio1 * andel_kvote2_alternativ,
      ansøgninger_prio1_kvote2),
    ansøgninger_total_kvote2 = if_else(
      år < 2009,
      ansøgninger_total * andel_kvote2_total,
      ansøgninger_total_kvote2))

# Fixing the name of the education
nurse_df_total <- nurse_df_total |> 
  mutate(uddannelse = if_else(
     treatment == "Sygeplejerskestrejke",
     "Sygeplejerske",
     uddannelse))

# Splitting into kvote 1 and kvote 2 data frames
nurse_df_k1 <- nurse_df_total |> 
  mutate(
    ansøgninger_main = ansøgninger_prio1 - ansøgninger_prio1_kvote2,
    ansøgninger_main_robust = ansøgninger_prio1 - ansøgninger_prio1_kvote2_alternativ,
    ansøgninger_robust_total = ansøgninger_total - ansøgninger_total_kvote2,
    # Fixing name (just for plotting conveniece)
    uddannelse = if_else(
      treatment == "Sygeplejerskestrejke",
      "Sygeplejerske (kvote 1)",
      uddannelse))

nurse_df_k2 <- nurse_df_total |> 
  mutate(
    ansøgninger_main = ansøgninger_prio1_kvote2,
    ansøgninger_main_robust = ansøgninger_prio1_kvote2_alternativ,
    ansøgninger_robust_total = ansøgninger_total_kvote2,
    # Fixing name (just for plotting conveniece)
    uddannelse = if_else(
      treatment == "Sygeplejerskestrejke",
      "Sygeplejerske (kvote 2)",
      uddannelse))

```




```{r}
# No grade-data before 2009 so I impute the 2009-value or median
teacher_df <- df |> 
  filter(treatment != "Sygeplejerskestrejke" & år <= 2019) |> 
  group_by(uddannelse) |> 
  mutate(
    karakter_alternativ = if_else(is.na(karakter), median(karakter, na.rm = TRUE), karakter),
    grade_2009 = ifelse(any(år == 2010), karakter[år == 2009], NA),
    karakter = ifelse(år < 2009 & is.na(karakter), grade_2009, karakter)
  ) |> 
  select(-grade_2009) |> 
  ungroup()

# Estimating the share with a grade above 7 by
# assuming a weibull-distribution - fit to match 2016 applications
shape <-  2.2
teacher_df$andel_over7 <- with(
  teacher_df,
  (1 - pweibull(7, shape = shape, scale = (karakter) / gamma(1 + 1 / shape))))

# Also estimating based on the imputed median (for robustness)
shape <-  2.2
teacher_df$andel_over7_alternativ <- with(
  teacher_df,
  (1 - pweibull(7, shape = shape, scale = (karakter_alternativ) / gamma(1 + 1 / shape))))

# Calculating applications with a grade above 7
teacher_df <- teacher_df |> 
  mutate(
    ansøgninger_main = ansøgninger_prio1 * andel_over7,
    ansøgninger_main_robust = ansøgninger_prio1 * andel_over7_alternativ,
    ansøgninger_robust_total = ansøgninger_total * andel_over7,
    ansøgninger_robust_uopdelt = ansøgninger_prio1,
    uddannelse = if_else(
      treatment == "Lærerlockout",
      "Folkeskolelærer",
      uddannelse))

```

## Table S1: Descriptive statistics

```{r}
summary_stats <- data.frame(
  Variable = c(
    "First priority applications",
    "Total applications",
    "Share female (%)",
    "Average age",
    "Median grade",
    "Number of education programs",
    "Observations"),
  Teacher = c(
    round(mean(df$ansøgninger_prio1[df$treatment == "Lærerlockout"], na.rm = TRUE), 1),
    round(mean(df$ansøgninger_total[df$treatment == "Lærerlockout"], na.rm = TRUE), 1),
    round(mean(df$andel_kvinder_prio1[df$treatment == "Lærerlockout"], na.rm = TRUE)*100, 1),
    round(mean(df$alder[df$treatment == "Lærerlockout"], na.rm = TRUE), 1),
    round(mean(df$karakter[df$treatment == "Lærerlockout"], na.rm = TRUE), 1),
    1,
    28),
  Nurse = c(
    round(mean(df$ansøgninger_prio1[df$treatment == "Sygeplejerskestrejke"], na.rm = TRUE), 1),
    round(mean(df$ansøgninger_total[df$treatment == "Sygeplejerskestrejke"], na.rm = TRUE), 1), 
    round(mean(df$andel_kvinder_prio1[df$treatment == "Sygeplejerskestrejke"], na.rm = TRUE)*100, 1),
    round(mean(df$alder[df$treatment == "Sygeplejerskestrejke"], na.rm = TRUE), 1),
    round(mean(df$karakter[df$treatment == "Sygeplejerskestrejke"], na.rm = TRUE), 1),
    1,
    28),
  `Donors` = c(
    round(mean(df$ansøgninger_prio1[df$treatment == "Kontrol" & df$uddannelsesniveau == "Prof. bachelor"], na.rm = TRUE), 1),
    round(mean(df$ansøgninger_total[df$treatment == "Kontrol" & df$uddannelsesniveau == "Prof. bachelor"], na.rm = TRUE), 1), 
    round(mean(df$andel_kvinder_prio1[df$treatment == "Kontrol" & df$uddannelsesniveau == "Prof. bachelor"], na.rm = TRUE)*100, 1),
    round(mean(df$alder[df$treatment == "Kontrol" & df$uddannelsesniveau == "Prof. bachelor"], na.rm = TRUE), 1),
    round(mean(df$karakter[df$treatment == "Kontrol" & df$uddannelsesniveau == "Prof. bachelor"], na.rm = TRUE), 1),
    length(unique(df$uddannelse[df$treatment == "Kontrol" & df$uddannelsesniveau == "Prof. bachelor"])),
    length(df$ansøgninger_prio1[df$treatment == "Kontrol" & df$uddannelsesniveau == "Prof. bachelor" & !is.na(df$ansøgninger_prio1)])),
  `All` = c(
    round(mean(df$ansøgninger_prio1[df$treatment == "Kontrol"], na.rm = TRUE), 1),
    round(mean(df$ansøgninger_total[df$treatment == "Kontrol"], na.rm = TRUE), 1), 
    round(mean(df$andel_kvinder_prio1[df$treatment == "Kontrol"], na.rm = TRUE)*100, 1),
    round(mean(df$alder[df$treatment == "Kontrol"], na.rm = TRUE), 1),
    round(mean(df$karakter[df$treatment == "Kontrol"], na.rm = TRUE), 1),
    length(unique(df$uddannelse[df$treatment == "Kontrol"])),
    length(df$ansøgninger_prio1[df$treatment == "Kontrol" & !is.na(df$ansøgninger_prio1)]))
  )

summary_stats |> 
  kableExtra::kable() |> 
  kableExtra::kable_styling(latex_options = c("scale_down", "HOLD_position"))
```

The table shows the average number of first priority applications, the average number of total applications, the share of applicants that are female, the average and median grade average among applicants, the number of degree programs and the number of observations (data points). These metrics are shown for the teacher and nurse programs, as well as for Professional Bachelor's Degrees, and all education programs across levels.

\newpage

## Figure S1: Visualization of imputement for nurses

```{r}
nurse_df_k1 |> 
  mutate(quota = "Quota 1") |>
  bind_rows(
    nurse_df_k2 |> 
      mutate(quota = "Quota 2")) |>
  filter(treatment == "Sygeplejerskestrejke") |> 
  pivot_longer(cols = c(ansøgninger_main, ansøgninger_main_robust),
               values_to = "ansøgninger_main") |> 
  mutate(name = if_else(
    name == "ansøgninger_main",
    "Linear imputing (main)",
    "Median imputed (alternative)")) |> 
  ggplot(aes(x = år, y = ansøgninger_main)) +
    geom_line(aes(linetype = år < 2009)) +
    scale_linetype_manual(values = c(
      "TRUE"  = "dashed",
      "FALSE" = "solid")) +
  geom_vline(xintercept = 2008.5, linetype = "dotted") +
  facet_grid(rows = vars(name), cols = vars(quota)) +
  theme_bw() +
  theme(legend.position = "none") +
  labs(y = "Applications",
       x = NULL,
       linetype = NULL,
       title = "Imputed (dashed) and actual (solid) applications")
```

For nurses it is neccessary to split into quota 1 and 2 applications. However, before 2009 the share of applications in quota 2 is not avaliable. Therefore we impute this share. For the main analysis, this is done by imputing a linear trend (top panel). As a robustness check we also impute the median share post-2009 (bottom panel). As can be seen the two methods generate similar results, and as shown further down, the results are robust to the choice of imputing. Further, results are robust to not splitting in quota 1 and 2, so the finding do not hinge on imputement. But as described in the main text, results should be more valid when split in quota 1 and 2. 

\newpage


## Figure S2: Visualization of imputement for teachers

```{r}
teacher_df |> 
  filter(uddannelse == "Folkeskolelærer") |> 
  mutate(measure = "Total priority 1",
         ansøgninger_main = ansøgninger_prio1) |> 
  bind_rows(
    teacher_df |> 
      filter(uddannelse == "Folkeskolelærer") |>
      mutate(ansøgninger_main = ansøgninger_main,
             measure = "GPA > 7 (estimated, 2009 value imputed)")) |> 
    bind_rows(
    teacher_df |> 
      filter(uddannelse == "Folkeskolelærer") |>
      mutate(ansøgninger_main = ansøgninger_main_robust,
             measure = "GPA > 7 (estimated, median value imputed)")) |> 
  ggplot(aes(x = år, y = ansøgninger_main, linetype = measure)) +
  geom_line() +
  theme_bw() +
  labs(y = "Applications",
       x = NULL,
       linetype = "Application measure",
       title = "Applications to the teacher-programs")
```

For teachers we need to limit applicants to those with a GPA above 7 to avoid confounding. As this number is not observed, we do this by assuming a weibull-distribution of grades around the median-grade. The parameters of the distribution are chosen so that the distribution of grades match those in 2016 (only year where the distribution is publically avaliable).

However, before 2009 the median grade is not observed. For our main analysis we simply impute the 2009 median pre-2009. As an alternative mode of imputing, we impute the median value post-2009. Both measures are shown above, and below we show that the results are robust to the method used for imputing. Further, results are robust to including applicants with all levels of GPA (but as described in the main text, limiting to applicants with a GPA above 7 should generate the most valid estimates). \newpage


# Study 1 (teachers)


```{r, include=FALSE}
gsk_df <- teacher_df |> 
  filter(uddannelsesniveau == "Prof. bachelor")


fit_teacher_main <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1", "karakter"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4
)

```


## Table S2: Main results study 1

```{r}
results <- data.frame(
  Year = as.vector(fit_teacher_main[["time"]]),
  Observed = as.vector(fit_teacher_main[["Y.tr"]]),
  Synthetic = as.vector(fit_teacher_main[["Y.ct"]])) |> 
  mutate(ATT = Observed - Synthetic,
         `ATT (%)` = (ATT / Synthetic) * 100)

results |> 
  kableExtra::kable() |> 
  kableExtra::kable_styling(latex_options = c("scale_down", "HOLD_position"))
```

The table shows the observed number of first-priority applications as well as the synthetic control-estimate of these. Additionally, the table shows the year-specific Average Treatment Effect on the Treated (ATT) as well as this quantity in percentage terms (ATT (%)).

\newpage

## Figure S3: Robustness - All priority 1 applications (not only above 7)

```{r, include=FALSE}
gsk_df <- teacher_df |> 
  filter(uddannelsesniveau == "Prof. bachelor")


fit_teacher_all <- gsynth(
  Y = "ansøgninger_robust_uopdelt",
  D = "d",
  X = c("andel_kvinder_prio1", "karakter"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 4,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```

```{r}
teacher_all <- report_results(
  fit = fit_teacher_all,
  education_name = "Folkeskolelærer",
  t0 = 2012)

teacher_all[[1]] | teacher_all[[2]]
```

Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

In the model all priority 1-applications are included. As such, we do not limit to only applicants with a grade average above 7 as is done in the main model.

\newpage

## Figure S4: Robustness - All levels of educations included as donors

```{r, include=FALSE}
fit_teacher_all_levels <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1", "karakter"),
  data = teacher_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```

```{r}
teacher_all_levels <- report_results(
  fit = fit_teacher_all_levels,
  education_name = "Folkeskolelærer",
  t0 = 2012)

teacher_all_levels[[1]] | teacher_all_levels[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

In the model all levels of education programs are included in the donor pool. As such, we do not limit the donor pool to only Professional Bachelor's Degrees as is done in the main model.

\newpage

## Figure S5: Robustness - IFE estimator (crossvalidated, r = 3)

```{r, include=FALSE}
gsk_df <- teacher_df |> 
  filter(uddannelsesniveau == "Prof. bachelor")

fit_teacher_ife_cv <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1", "karakter"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  EM = TRUE,
  se = TRUE,
  CV = TRUE,
  force = "two-way",
  inference = "parametric",
  min.T0 = 7,
  nboots = 1000,
  seed = 4,
  nlambda = 20,
)
```

```{r}
teacher_ife_cv <- report_results(
  fit = fit_teacher_ife_cv,
  education_name = "Folkeskolelærer",
  t0 = 2012)

teacher_ife_cv[[1]] | teacher_ife_cv[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

Here we use the alternative IFE-estimator of the synthetic control. The r-hyperparameter is set to 3, which is obtained through cross-validation.
\newpage

## Figure S6: Robustness - IFE estimator (r = 5)

```{r, include=FALSE}
gsk_df <- teacher_df |> 
  filter(uddannelsesniveau == "Prof. bachelor")

fit_teacher_ife_5 <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1", "karakter"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  EM = TRUE,
  se = TRUE,
  CV = FALSE,
  r = 5,
  force = "two-way",
  inference = "parametric",
  min.T0 = 7,
  nboots = 1000,
  seed = 4,
  nlambda = 20,
)
```

```{r}
teacher_ife_5 <- report_results(
  fit = fit_teacher_ife_5,
  education_name = "Folkeskolelærer",
  t0 = 2012)

teacher_ife_5[[1]] | teacher_ife_5[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

Here we use the alternative IFE-estimator of the synthetic control. The r-hyperparameter is set to the maximum value of 5.
\newpage

## Figure S7: Robustness - Balanced panel

```{r, include=FALSE}
gsk_df <- teacher_df |> 
  filter(uddannelsesniveau == "Prof. bachelor") |> 
  group_by(uddannelse) |> 
  mutate(n = n()) |> 
  ungroup() |> 
  filter(n >= 23)


fit_teacher_balanced <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1", "karakter"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```

```{r}
teacher_balanced <- report_results(
  fit = fit_teacher_balanced,
  education_name = "Folkeskolelærer",
  t0 = 2012)

teacher_balanced[[1]] | teacher_balanced[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

In this model we only include a balanced panel of donor units, i.e. we only include donor units that are observed througout the entire period.
\newpage

## Table S3: Implied donor unit weights (from IFE)

```{r}
translations <- c(
  "Beredskab -,  katastrofe- og risikomanagement, prof. bach." = "Emergency, Disaster and Risk Management",
  "Bioanalytiker, prof. bach." = "Biomedical Laboratory Scientist",
  "Bygningskonstruktør, prof. bach." = "Architectural Technologist",
  "Diakoni og socialpædagogik, prof. bach." = "Diaconia and Social Education",
  "Diplomingeniør, prof. bach." = "Bachelor of Engineering",
  "Ergoterapeut, prof. bach." = "Occupational Therapist",
  "Ernæring og sundhed, prof. bach." = "Nutrition and Health",
  "Event management og økonomi, prof. bach." = "Event Management and Economics",
  "Fremmedsprog og digital markedskommunikation, prof. bach." = "Foreign Languages and Digital Marketing Communication",
  "Fysioterapeut, prof. bach." = "Physiotherapist",
  "Global Nutrition and Health, prof. bach." = "Global Nutrition and Health",
  "Kommunikation, prof. bach." = "Communication",
  "Offentlig administration, prof. bach." = "Public Administration",
  "Optometri, prof. bach." = "Optometry",
  "Procesøkonomi og værdikædeledelse, prof. bach." = "Process Economics and Value Chain Management",
  "Professionsbachelor i finans, prof. bach." = "Finance",
  "Pædagog, prof. bach." = "Pedagogue",
  "Radiograf, prof. bach." = "Radiographer",
  "Skov- og landskabsingeniør, prof. bach." = "Forest and Landscape Engineer",
  "Socialrådgiver, prof. bach." = "Social Worker",
  "Tandplejer, prof. bach." = "Dental Hygienist",
  "Økonomi og informationsteknologi, prof. bach." = "Economics and Information Technology"
)

fit_teacher_ife_cv[["wgt.implied"]] |> 
  as.data.frame() |> 
  tibble::rownames_to_column("Education") |>
  mutate(Education = translations[Education]) |> 
  rename(Weight = Folkeskolelærer) |> 
  arrange(desc(Weight)) |> 
  kableExtra::kable() |> 
  kableExtra::kable_styling(latex_options = c("scale_down", "HOLD_position"))
```
The table shows the implied donor unit weights for all donor units using the alternative IFE-estimator. These weights are mainly for illustrative purposes, as the IFE-estimator is not used in the main analysis. However, as shown in figure S5 the results are reproduced using the IFE-estimator.
\newpage

## Figure S8: Robustness - Excluding diploma engineering

```{r, include=FALSE}
gsk_df <- teacher_df |> 
  filter(uddannelsesniveau == "Prof. bachelor" & uddannelse != "Diplomingeniør, prof. bach.")


fit_teacher_excluding_ing <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1", "karakter"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4
)

```

```{r}
teacher_main <- report_results(
  fit = fit_teacher_excluding_ing,
  education_name = "Folkeskolelærer",
  t0 = 2012)

teacher_main[[1]] | teacher_main[[2]]
```

Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

The diploma ingeneering achieves a large placebo-estimate, indicating that it might not be suitable to be included as a donor-unit. Therefore we assess if results are robust to excluding it
\newpage

## Figure S9: Robustness - Alternative imputing

```{r, include=FALSE}
gsk_df <- teacher_df |> 
  filter(uddannelsesniveau == "Prof. bachelor")


fit_teacher_main <- gsynth(
  Y = "ansøgninger_main_robust",
  D = "d",
  X = c("andel_kvinder_prio1", "karakter_alternativ"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4
)

```


```{r}
teacher_main <- report_results(
  fit = fit_teacher_main,
  education_name = "Folkeskolelærer",
  t0 = 2012)

teacher_main[[1]] | teacher_main[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

Here we use the alternative imputation-method described below figure S2.
\newpage

# Study 2 (nurses)


```{r, include=FALSE}
gsk_df <- nurse_df_k1 |> 
  filter(uddannelsesniveau == "Prof. bachelor")


fit_nurse_k1_main <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```


## Table S4: Main results study 2

```{r}
results <- data.frame(
  Year = as.vector(fit_nurse_k1_main[["time"]]),
  Observed = as.vector(fit_nurse_k1_main[["Y.tr"]]),
  Synthetic = as.vector(fit_nurse_k1_main[["Y.ct"]])) |> 
  mutate(ATT = Observed - Synthetic,
         `ATT (%)` = (ATT / Synthetic) * 100)

results |> 
  kableExtra::kable() |> 
  kableExtra::kable_styling(latex_options = c("scale_down", "HOLD_position"))
```
The table shows the observed number of first-priority applications as well as the synthetic control-estimate of these. Additionally, the table shows the year-specific Average Treatment Effect on the Treated (ATT) as well as this quantity in percentage terms (ATT (%)).
\newpage

## Figure S10: Robustness - Joint estimation of quota 1 and 2 (quota 2 t0 = 2022)

```{r, include=FALSE}

gsk_df <- nurse_df_total |> 
  filter(treatment != "Sygeplejerskestrejke" &
           uddannelsesniveau == "Prof. bachelor") |> 
  mutate(ansøgninger_main = ansøgninger_prio1) |> 
  bind_rows(
    nurse_df_k1 |> filter(treatment == "Sygeplejerskestrejke")) |> 
  bind_rows(
    nurse_df_k2 |> 
      filter(treatment == "Sygeplejerskestrejke") |> 
      mutate(d = if_else(
        år == 2021,
        0,
        d)))


fit_nurse_joint <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```

```{r}

plot(fit_nurse_joint, type = "ct") 
plot(fit_nurse_joint, type = "gap")

```
The two plots show the results of estimating quota 1 and quota 2 jointly instead of seperately. However, the two groups are still treated as seperate units. In the model t0 is set to 2022 for quota 2, as the application deadline in 2021 was before the strike. 

\newpage

## Figure S11: Robustness - All priority 1 applications (not split in quota 1 & 2)

```{r, include=FALSE}
gsk_df <- nurse_df_total |> 
  filter(uddannelsesniveau == "Prof. bachelor")


fit_nurse_k1_all <- gsynth(
  Y = "ansøgninger_prio1",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```

```{r}
nurse_k1_all <- report_results(
  fit = fit_nurse_k1_all,
  education_name = "Sygeplejerske",
  t0 = 2020)

nurse_k1_all[[1]] | nurse_k1_all[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

In the model all priority 1-applicants to the nursing program is included, and as such we do not differentiate between quotas as in the main model.
\newpage

## Figure S12: Robustness - All levels of educations included as donors

```{r, include=FALSE}
fit_nurse_k1_all_levels <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = nurse_df_k1,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```

```{r}
nurse_k1_all_levels <- report_results(
  fit = fit_nurse_k1_all_levels,
  education_name = "Sygeplejerske (kvote 1)",
  t0 = 2020)

nurse_k1_all_levels[[1]] | nurse_k1_all_levels[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

In the model all levels of education programs are included in the donor pool. As such, we do not limit the donor pool to only Professional Bachelor's Degrees as is done in the main model.
\newpage

## Figure S13: Robustness - IFE estimator (crossvalidated, r = 1)

```{r, include=FALSE}
gsk_df <- nurse_df_k1 |> 
  filter(uddannelsesniveau == "Prof. bachelor")

fit_nurse_k1_ife_cv <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  EM = TRUE,
  se = TRUE,
  CV = TRUE,
  force = "two-way",
  inference = "parametric",
  min.T0 = 7,
  nboots = 1000,
  seed = 4,
  nlambda = 20,
)
```

```{r}
nurse_k1_ife_cv <- report_results(
  fit = fit_nurse_k1_ife_cv,
  education_name = "Sygeplejerske (kvote 1)",
  t0 = 2020)

nurse_k1_ife_cv[[1]] | nurse_k1_ife_cv[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

Here we use the alternative IFE-estimator of the synthetic control. The r-hyperparameter is set to 1, which is obtained through cross-validation.
\newpage

## Figure S14: Robustness - IFE estimator (r = 5)

```{r, include=FALSE}
gsk_df <- nurse_df_k1 |> 
  filter(uddannelsesniveau == "Prof. bachelor")

fit_nurse_k1_ife_5 <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  EM = TRUE,
  se = TRUE,
  CV = FALSE,
  r = 5,
  force = "two-way",
  inference = "parametric",
  min.T0 = 7,
  nboots = 1000,
  seed = 4,
  nlambda = 20,
)
```

```{r}
nurse_k1_ife_5 <- report_results(
  fit = fit_nurse_k1_ife_5,
  education_name = "Sygeplejerske (kvote 1)",
  t0 = 2020)

nurse_k1_ife_5[[1]] | nurse_k1_ife_5[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

Here we use the alternative IFE-estimator of the synthetic control. The r-hyperparameter is set to the maximum value of 5.
\newpage

## Figure S15: Robustness - Total quota 1 applications as outcome

```{r, include=FALSE}
gsk_df <- nurse_df_k1 |> 
  filter(uddannelsesniveau == "Prof. bachelor")


fit_nurse_k1_total <- gsynth(
  Y = "ansøgninger_robust_total",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```

```{r}
nurse_k1_total <- report_results(
  fit = fit_nurse_k1_total,
  education_name = "Sygeplejerske (kvote 1)",
  t0 = 2020)

nurse_k1_total[[1]] | nurse_k1_total[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

Here we use total quota 1-applications as an alternative outcome-measure.
\newpage

## Figure S16: Robustness - Balanced panel

```{r, include=FALSE}
gsk_df <- nurse_df_k1 |> 
  filter(uddannelsesniveau == "Prof. bachelor") |> 
  group_by(uddannelse) |> 
  mutate(n = n()) |> 
  ungroup() |> 
  filter(n >= 28)


fit_nurse_k1_balanced <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```

```{r}
nurse_k1_balanced <- report_results(
  fit = fit_nurse_k1_balanced,
  education_name = "Sygeplejerske (kvote 1)",
  t0 = 2020)

nurse_k1_balanced[[1]] | nurse_k1_balanced[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

In this model we only include a balanced panel of donor units, i.e. we only include donor units that are observed througout the entire period.
\newpage


## Table S5: Implied donor unit weights (from IFE)

```{r}

translations <- c(
  "Diplomingeniør, prof. bach." = "Bachelor of Engineering",
  "Socialrådgiver, prof. bach." = "Social Worker",
  "Bygningskonstruktør, prof. bach." = "Architectural Technologist",
  "Animation, prof. bach." = "Animation",
  "Professionsbachelor i finans, prof. bach." = "Finance",
  "Event management og økonomi, prof. bach." = "Event Management and Economics",
  "Multiplatform storytelling and production, prof. bach." = "Multiplatform Storytelling and Production",
  "Global Nutrition and Health, prof. bach." = "Global Nutrition and Health",
  "Diakoni og socialpædagogik, prof. bach." = "Diaconia and Social Education",
  "Skat, prof. bach." = "Taxation",
  "Smykker, teknologi og business, prof. bach." = "Jewellery, Technology and Business",
  "Bioanalytiker, prof. bach." = "Biomedical Laboratory Scientist",
  "Offentlig administration, prof. bach." = "Public Administration",
  "Fødevareteknologi, prof. bach." = "Food Technology",
  "Fri, høj- og efterskolelærer, prof. bach." = "Independent, Folk High School and Continuation School Teacher",
  "Fremmedsprog og digital markedskommunikation, prof. bach." = "Foreign Languages and Digital Marketing Communication",
  "Optometri, prof. bach." = "Optometry",
  "Natur- og kulturformidling, prof. bach." = "Nature and Cultural Communication",
  "Skibsofficer, prof. bach." = "Ship Officer",
  "Radiograf, prof. bach." = "Radiographer",
  "Økonomi og informationsteknologi, prof. bach." = "Economics and Information Technology",
  "Kristendom, kultur og kommunikation, prof. bach." = "Christianity, Culture and Communication",
  "Procesøkonomi og værdikædeledelse, prof. bach." = "Process Economics and Value Chain Management",
  "Dansk tegnsprog og tolkning, prof. bach." = "Danish Sign Language and Interpretation",
  "Medie- og Sonokommunikation, prof. bach." = "Media and Sonic Communication",
  "Pædagog, prof. bach." = "Pedagogue",
  "Tandplejer, prof. bach." = "Dental Hygienist",
  "Have- og parkingeniør, prof. bach." = "Horticulture and Park Engineering",
  "Ernæring og sundhed, prof. bach." = "Nutrition and Health",
  "Afspændingspædagogik og psykomotorik, prof. bach." = "Psychomotor Therapy",
  "Maskinmester, prof. bach." = "Marine Engineer",
  "Medieproduktion og ledelse, prof. bach." = "Media Production and Management",
  "Beredskab -,  katastrofe- og risikomanagement, prof. bach." = "Emergency, Disaster and Risk Management",
  "Skov- og landskabsingeniør, prof. bach." = "Forest and Landscape Engineer",
  "Tekstildesign, -håndværk og formidling, prof. bach." = "Textile Design, Craft and Communication",
  "Kommunikation, prof. bach." = "Communication",
  "Jordemoder, prof. bach." = "Midwife",
  "Fysioterapeut, prof. bach." = "Physiotherapist",
  "Skibsfører, prof. bach." = "Ship Master",
  "Eksport og teknologi, prof. bach." = "Export and Technology",
  "Fotojournalist, prof. bach." = "Photojournalist",
  "Ergoterapeut, prof. bach." = "Occupational Therapist",
  "Visuel kommunikation, prof. bach." = "Visual Communication",
  "Grafisk Kommunikation, prof. bach." = "Graphic Communication",
  "TV- og medietilrettelægger, prof. bach." = "TV and Media Planning",
  "Journalist, prof. bach." = "Journalist"
)

fit_nurse_k1_ife_5[["wgt.implied"]] |> 
  as.data.frame() |> 
  tibble::rownames_to_column("Education") |>
  mutate(Education = translations[Education]) |> 
  rename(Weight = `Sygeplejerske (kvote 1)`) |> 
  arrange(desc(Weight)) |> 
  kableExtra::kable() |> 
  kableExtra::kable_styling(latex_options = c("scale_down", "HOLD_position"))
```
The table shows the implied donor unit weights for all donor units using the alternative IFE-estimator. These weights are mainly for illustrative purposes, as the IFE-estimator is not used in the main analysis. However, as shown in figure S13-14 the results are reproduced using the IFE-estimator.
\newpage

## Figure S17: Robustness - Excluding diploma engineering

```{r, include=FALSE}
gsk_df <- nurse_df_k1 |> 
  filter(uddannelsesniveau == "Prof. bachelor" & uddannelse != "Diplomingeniør, prof. bach.")


fit_nurse_k1_excluding_ing <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```


```{r}
nurse_k1_main <- report_results(
  fit = fit_nurse_k1_excluding_ing,
  education_name = "Sygeplejerske (kvote 1)",
  t0 = 2020)

nurse_k1_main[[1]] | nurse_k1_main[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

The diploma ingeneering achieves a large placebo-estimate, indicating that it might not be suitable to be included as a donor-unit. Therefore we assess if results are robust to excluding it
\newpage

## Figure S18: Robustness - Excluding social education

```{r, include=FALSE}
gsk_df <- nurse_df_k1 |> 
  filter(uddannelsesniveau == "Prof. bachelor" & uddannelse != "Pædagog, prof. bach.")


fit_nurse_k1_excluding_ing <- gsynth(
  Y = "ansøgninger_main",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```


```{r}
nurse_k1_main <- report_results(
  fit = fit_nurse_k1_excluding_ing,
  education_name = "Sygeplejerske (kvote 1)",
  t0 = 2020)

nurse_k1_main[[1]] | nurse_k1_main[[2]]
```
Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

The social educator-applications achieves a large placebo-estimate. To probe whether results are robust to excluding this, we reestimate the model, where this unit is excluded from the donor pool.
\newpage

## Figure S19: Robustness - Alternative imputing

```{r, include=FALSE}
gsk_df <- nurse_df_k1 |> 
  filter(uddannelsesniveau == "Prof. bachelor")


fit_nurse_k1_main <- gsynth(
  Y = "ansøgninger_main_robust",
  D = "d",
  X = c("andel_kvinder_prio1"),
  data = gsk_df,
  index = c("uddannelse", "år"),
  na.rm = TRUE,
  se = TRUE,
  force = "two-way",
  estimator = "mc",
  min.T0 = 5,
  nboots = 1000,
  seed = 4,
  nlambda = 20
)

```

```{r}
nurse_k1_main <- report_results(
  fit = fit_nurse_k1_main,
  education_name = "Sygeplejerske (kvote 1)",
  t0 = 2020)

nurse_k1_main[[1]] | nurse_k1_main[[2]]
```

Panel A of the figure shows the observed number of first priority of applications as well as the synthetic control estimate of these. Additionally, the individual donor units are shown in shaded grey. Panel B shows the yearly effect estimate (ATT) with 95%-confidence interval in shaded grey.

Here we use the alternative imputation-method described below figure S1.
